In [1]:
import numpy as np
from keras.datasets import imdb
In [2]:
# Getting IMDB data
(train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words = 10000)
In [3]:
# Vectorizing the sequence
def vectorize_sequences(sequences, dimension = 10000):
# Create an all-zero matrix of shape (len(sequences), dimension)
results = np.zeros((len(sequences), dimension))
for i, sequence in enumerate(sequences):
results[i, sequence] = 1. # set specific indices of results[i] to 1s
return results
In [4]:
# Vectorizing the input datasets
x_train = vectorize_sequences(train_data)
x_test = vectorize_sequences(test_data)
In [5]:
# Changing types of the to float 32
y_train = np.asarray(train_labels)
y_train = y_train.astype('float32')
y_test = np.asarray(test_labels)
y_test = y_test.astype('float32')
In [6]:
# Keras imports
from keras.models import Sequential
from keras.layers import Dense
In [7]:
# Defining the original model
def build_original_model():
model = Sequential()
model.add(Dense(units = 16,
activation = 'relu',
input_shape = (10000,)))
model.add(Dense(units = 16,
activation = 'relu'))
model.add(Dense(units = 1,
activation = 'sigmoid'))
model.compile(optimizer = 'rmsprop',
loss = 'binary_crossentropy',
metrics = ['acc'])
return model
In [8]:
# Defining a small model
def build_small_model():
model = Sequential()
model.add(Dense(units = 4,
activation = 'relu',
input_shape = (10000,)))
model.add(Dense(units = 4,
activation = 'relu'))
model.add(Dense(units = 1,
activation = 'sigmoid'))
model.compile(optimizer = 'rmsprop',
loss = 'binary_crossentropy',
metrics = ['acc'])
return model
In [9]:
# Defining a big model
def build_big_model():
model = Sequential()
model.add(Dense(units = 512,
activation = 'relu',
input_shape = (10000,)))
model.add(Dense(units = 512,
activation = 'relu'))
model.add(Dense(units = 1,
activation = 'sigmoid'))
model.compile(optimizer = 'rmsprop',
loss = 'binary_crossentropy',
metrics = ['acc'])
return model
In [10]:
# Initializing the original network
original_network = build_original_model()
In [11]:
# Training the original network
original_network_history = original_network.fit(x_train,
y_train,
epochs = 20,
batch_size = 512,
validation_data = (x_test, y_test))
In [12]:
small_network = build_small_model()
In [13]:
small_network_history = small_network.fit(x_train,
y_train,
epochs = 20,
batch_size = 512,
validation_data = (x_test, y_test))
In [14]:
epochs = range(1, 21)
original_val_loss = original_network_history.history['val_loss']
small_model_val_loss = small_network_history.history['val_loss']
In [15]:
import matplotlib.pyplot as plt
plt.figure(figsize = (10, 6))
# Plotting the validation loss of the original network
# b+ is for "blue cross"
plt.plot(epochs,
original_val_loss,
'b+',
label='Original model')
# Plotting the validation loss of the small network
# "bo" is for "blue dot"
plt.plot(epochs,
small_model_val_loss, 'bo', label='Small model')
plt.xlabel('Epochs')
plt.ylabel('Validation loss')
plt.legend()
plt.show()
The smaller network overfits later than the bigger network. Let's investigate other scenario.
In [16]:
# Initializing the big network
big_network = build_big_model()
In [17]:
big_network_history = big_network.fit(x_train,
y_train,
epochs = 20,
batch_size = 512,
validation_data = (x_test, y_test))
In [18]:
# Comparison of validation losses
big_model_val_loss = big_network_history.history['val_loss']
plt.figure(figsize = (10, 6))
plt.plot(epochs,
original_val_loss,
'b+',
label = 'Original model')
plt.plot(epochs,
big_model_val_loss,
'bo',
label = 'Big model')
plt.xlabel('Epochs')
plt.ylabel('Validation loss')
plt.legend()
plt.show()
In [19]:
# Comparison of traning losses
original_train_loss = original_network_history.history['loss']
bigger_model_train_loss = big_network_history.history['loss']
plt.figure(figsize = (10, 6))
plt.plot(epochs,
original_train_loss,
'b+',
label = 'Original model')
plt.plot(epochs,
bigger_model_train_loss,
'bo',
label = 'Big model')
plt.xlabel('Epochs')
plt.ylabel('Training loss')
plt.legend()
plt.show()
The bigger network adpats faster, but it results in more severe overfitting issues.
In [20]:
from keras.regularizers import l2
In [21]:
# Building the original network with weight regularization
def build_original_model_with_l2():
model = Sequential()
model.add(Dense(units = 16,
activation = 'relu',
# Parameter l -> weight loss
kernel_regularizer = l2(l = 0.001),
input_shape = (10000,)))
model.add(Dense(units = 16,
activation = 'relu',
kernel_regularizer = l2(l = 0.001)))
model.add(Dense(units = 1,
activation = 'sigmoid'))
model.compile(optimizer = 'rmsprop',
loss = 'binary_crossentropy',
metrics = ['acc'])
return model
In [22]:
original_network_with_l2 = build_original_model_with_l2()
In [23]:
original_network_with_l2_history = original_network_with_l2.fit(x_train,
y_train,
epochs = 20,
batch_size = 512,
validation_data = (x_test, y_test))
In [24]:
original_network_with_l2_val_loss = original_network_with_l2_history.history['val_loss']
plt.figure(figsize = (10, 6))
plt.plot(epochs,
original_val_loss,
'b+',
label = 'Original model')
plt.plot(epochs,
original_network_with_l2_val_loss,
'bo',
label = 'L2-regularized model')
plt.xlabel('Epochs')
plt.ylabel('Validation loss')
plt.legend()
plt.show()
In [25]:
from keras.layers import Dropout
In [26]:
# Defining the original model with dropout
def build_original_model_with_dropout():
model = Sequential()
model.add(Dense(units = 16,
activation = 'relu',
input_shape = (10000,)))
# Parameter rate -> fraction of the input units to drop
model.add(Dropout(rate = 0.5))
model.add(Dense(units = 16,
activation = 'relu'))
model.add(Dropout(rate = 0.5))
model.add(Dense(units = 1,
activation = 'sigmoid'))
model.compile(optimizer = 'rmsprop',
loss = 'binary_crossentropy',
metrics = ['acc'])
return model
In [27]:
original_model_with_dropout = build_original_model_with_dropout()
In [28]:
original_model_with_dropout_history = original_model_with_dropout.fit(x_train,
y_train,
epochs = 20,
batch_size = 512,
validation_data = (x_test, y_test))
In [29]:
original_model_with_dropout_val_loss = original_model_with_dropout_history.history['val_loss']
plt.figure(figsize = (10, 6))
plt.plot(epochs, original_val_loss, 'b+', label='Original model')
plt.plot(epochs, original_model_with_dropout_val_loss, 'bo', label='Dropout-regularized model')
plt.xlabel('Epochs')
plt.ylabel('Validation loss')
plt.legend()
plt.show()
Model with droupout overfits later.